package com.ddwanglife.ml.third

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.DataFrame

object DataFrameDemo {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkSQLBasicExample").master("local").getOrCreate()
    //创建dataframe
    val df1 = spark.read.option("header","true").format("csv").load("E:\\ml\\customer.csv")

    df1.printSchema()

    val df2 = df1.select(
      df1("name").cast("String"),
      df1("age").cast("Double"),
      df1("gender").cast("String")
    )
    df2.printSchema()

    /*val conf = new SparkConf()
    conf.setAppName("FilterAndWhere").setMaster("local")
    val sparkContext = new SparkContext(conf)
    val sqlContext = new SQLContext(sparkContext)
    val data =sqlContext.read.format("com.databricks.spark.csv")
      .option("header","true") //这里如果在csv第一行有属性的话，没有就是"false"
      .option("inferSchema",true.toString)//这是自动推断属性列的数据类型。
      .load("E:\\\\ml\\\\customer.csv")//文件的路径
    data.printSchema()*/


  }
}
